In [1]:
import pandas as pd
import statsmodels.api as sm

# Normal response variable
stackloss_conversion = sm.datasets.get_rdataset("stackloss", "datasets")
#print (stackloss_conversion.__doc__)

# Lognormal response variable
engel_food = sm.datasets.engel.load_pandas()
#print (engel_food.data)

# Binary response variable
titanic_survival = sm.datasets.get_rdataset("Titanic", "datasets")
#print (titanic_survival.__doc__)

# Continuous 0-1 response variable
duncan_prestige = sm.datasets.get_rdataset("Duncan", "car")
#print (duncan_prestige.__doc__)

# Categorical response variable
iris_flowers = sm.datasets.get_rdataset("iris")
#print (iris_flowers.__doc__)

General guides to Bayesian regression

http://twiecki.github.io/blog/2015/11/10/mcmc-sampling/


In [4]:
# Showing plots inline, rather than in a new window
%matplotlib inline 

# Modules
from pymc3 import  *

import numpy as np
from ggplot import *

# Generating data
size = 200
true_intercept = 1
true_slope = 2

x = np.linspace(0, 1, size)
# y = a + b*x
true_regression_line = true_intercept + true_slope * x
# add noise
y = true_regression_line + np.random.normal(scale=.5, size=size)


# Plotting data
sim_data = pd.DataFrame({"x" : x, "y" : y})

sim_plot = ggplot(sim_data, aes(x="x", y="y")) + geom_point() +\
    geom_abline(intercept=true_intercept, slope=true_slope)

print(sim_plot)


            x         y
0    0.000000  0.905601
1    0.005025  1.258358
2    0.010050  1.138474
3    0.015075  0.040270
4    0.020101  1.300725
5    0.025126  1.383151
6    0.030151  0.781695
7    0.035176  1.691216
8    0.040201  1.339529
9    0.045226  1.437806
10   0.050251  1.168217
11   0.055276  1.079443
12   0.060302  1.724175
13   0.065327  1.378221
14   0.070352  0.890892
15   0.075377  1.683178
16   0.080402  1.044670
17   0.085427  1.421334
18   0.090452  0.816687
19   0.095477  1.202203
20   0.100503  1.303426
21   0.105528  0.686893
22   0.110553  1.363057
23   0.115578  0.921237
24   0.120603  1.513383
25   0.125628  1.691983
26   0.130653  1.208474
27   0.135678  1.619693
28   0.140704  1.433118
29   0.145729  1.760959
..        ...       ...
170  0.854271  2.952946
171  0.859296  1.926038
172  0.864322  2.351359
173  0.869347  2.858340
174  0.874372  2.635869
175  0.879397  2.445086
176  0.884422  2.981224
177  0.889447  2.452418
178  0.894472  3.208470
179  0.899497  3.193329
180  0.904523  2.374825
181  0.909548  1.587553
182  0.914573  2.871383
183  0.919598  3.187504
184  0.924623  2.787407
185  0.929648  2.827244
186  0.934673  2.404022
187  0.939698  2.343381
188  0.944724  1.621443
189  0.949749  3.568309
190  0.954774  3.040782
191  0.959799  3.501753
192  0.964824  2.861555
193  0.969849  2.680906
194  0.974874  2.921949
195  0.979899  2.849662
196  0.984925  2.493336
197  0.989950  2.572304
198  0.994975  2.515720
199  1.000000  3.674289

[200 rows x 2 columns]
/usr/local/lib/python3.4/dist-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
<ggplot: (-9223363287798574781)>

BayesPy

http://www.bayespy.org/intro.html

Install fails